/**************************************************************************
* This macro adds connects DB plan records to the BR firmid 
/*************************************************************************/

%macro dbxwalktobr (yy);

	/*First link to the f55brb xwalk file*/
	proc sql;
		create table db&yy._brx as 
		select *
		from db&yy as a left join
		f55b&yy..f55bf&yy._ssel_xwk as b
		on a.filing_id_char eq b.DLN_1 & a.opr_ein = b.ein5500 & a.opr_pn = b.plnnum;
	quit;
	
	/*Some dln's are not matched. Go back and try to get these by EIN*/
	data db&yy._brx_nm db&yy._brx_m;
		set db&yy._brx (drop = year);	/*same as form_year*/
		if flag=. then output db&yy._brx_nm;
		else output db&yy._brx_m;
	run;
	
	data db&yy._brx_nm;
		set db&yy._brx_nm
		(drop = dln 
			dln_1
			formid
			plnnum
			active
			ein5500
			fileyear
			firmid
			flag
			gov
			mu);
	run;
	
	proc sql;
		create table db&yy._brx_nm1 as 
		select *
		from db&yy._brx_nm as a inner join
		f55b&yy..f55bf&yy._ssel_xwk as b
		on a.opr_ein = b.ein5500;
	quit;
	
	/*De-dup*/
	proc sort data = db&yy._brx_nm1 nodupkey;
		by opr_ein opr_pn;
	run;
	
	proc sort data = db&yy._brx_nm;
		by opr_ein opr_pn;
	
	/*Non-matches after ein merge*/
	data db&yy._brx_nm2;
		merge db&yy._brx_nm1 (in=matched) db&yy._brx_nm (in=all);
		by opr_ein opr_pn;
		if all=/*redacted*/ & matched=/*redacted*/;
	run;
	
	/*Re-stack the initial non-matches*/
	data db&yy._brx_rm;
		set db&yy._brx_nm1 db&yy._brx_nm2;
	run;
	
	/*Re-stack all the data*/
	data db&yy._brx_fin;
		set db&yy._brx_m db&yy._brx_rm;
	run;

       /*Create a file to compute match rate disclosure stats for the F5500-BR match*/
	data discl_f55br&yy (keep=opr_ein opr_pn adj_DB_partcp match_br plan_year_end);
	    set db&yy._brx_fin;
	    match_br=/*redacted*/;
	    if flag in /*redacted*/ then match_br=/*redacted*/;
	run;
	
	/*Tabulate the quality of the match*/
	proc sort data = db&yy._brx_fin;
		by flag;
	run;
	
	proc means noprint data = db&yy._brx_fin;
		by flag;
		output out = ptcp_by_match
		sum(adj_DB_partcp) = adj_DB_partcp;
	run;

	data ptcp_by_match (drop=_type_);
		set ptcp_by_match (rename= (_freq_=num_firms adj_DB_partcp=num_active_ptcp));
	run;

	proc export data = ptcp_by_match
		outfile = "/.../data/f55brmatchstat_&yy..csv"
		dbms = csv replace;
	run;
	
	/*Split by SU and MU */
	
	data db&yy._brx_su db&yy._brx_mu;
		length alpha /*redacted*/;
		length ein_br /*redacted*/;
		set db&yy._brx_fin;
		t=/*redacted*/;
		if mu=/*redacted*/ then ein_br= /*redacted*/;
		if mu=/*redacted*/ then alpha= /*redacted*/;
		if flag in /*redacted*/ then output db&yy._brx_su;
		else if flag in /*redacted*/ then output db&yy._brx_mu;
	run;
	
	/*Merge with BR files*/
	
	/*MU first*/
	
	proc sort data = br&yy..ssl&yy.mu out= mu&yy (keep=ein alpha sic REMP RAP);
		by alpha;
	run;

	/*Find the modal industry*/
	proc freq data=mu&yy noprint;
		by alpha;
		tables sic / out = mu&yy._cnt;
	run;

	proc sort data = mu&yy._cnt;
		by alpha descending count;
	run;

	data mu&yy._cnt;
		set mu&yy._cnt;
		by alpha;
		if first.alpha then mode=1;
	run;

	data mu&yy._cnt (keep=alpha sic);
		set mu&yy._cnt;
		where mode=/*redacted*/;
	run;
	
	proc means noprint data = mu&yy;
		by alpha;
		output out = mu&yy._size 
		sum(REMP) = esize_br
		sum(RAP) = epayroll_br;
	run;

	proc sql;
		create table mu&yy._all as
		select a.alpha, a.esize_br, a.epayroll_br, b.sic as sic_br
		from mu&yy._size as a left join
		mu&yy._cnt as b 
		on a.alpha = b.alpha;
	quit;
	
	proc sql;
		create table db&yy._brx_mu_fin as 
		select a.*, b.alpha, b.esize_br, b.epayroll_br, b.sic_br
		from db&yy._brx_mu as a inner join
		mu&yy._all as b 
		on a.alpha = b.alpha;
	quit; 
	
		
	/*SU, next*/
	proc sql;	
		create table db&yy._brx_su1 as
		select a.*, b.ACEMP, b.AC943E, b.ACQP1, b.ACQP2, b.ACQP3, b.ACQP4, b.AC943P, b.sic as sic_br, b.ein 
		from db&yy._brx_su as a left join
		br&yy..ssl&yy.su as b on 
		a.ein_br= b.ein;
	quit;

	/*Toss plans that map to a firmid more than once.*/

	proc sort data = db&yy._brx_su1;
		by firmid;
	run;

	proc means noprint data = db&yy._brx_su1;
		by firmid;
		output out = br_id_cnt sum(t) = frequency;
	run;

	proc sql;
		create table db&yy._brx_su2 as 
		select a.*, b.firmid, b.frequency
		from db&yy._brx_su1 as a left join
		br_id_cnt as b
		on a.firmid = b.firmid;
	quit;

	data db&yy._brx_su3 db&yy._brx_su4;
		set db&yy._brx_su2;
		if frequency = /*redacted*/ then output db&yy._brx_su3; /*clean no-dups*/
		else if frequency = /*redacted*/ then output db&yy._brx_su4; /*with dups*/
	run;

	/*Clean the dup file by picking only exact filing_id duplicates that are from the research file*/
	proc sort data = db&yy._brx_su4;
		by filing_id_char;
	run;

	proc means noprint data = db&yy._brx_su4;
		by filing_id_char;
		output out = filing_id_dup sum(t) = num_dup_filings;
	run;
	
	proc sql;
		create table db&yy._brx_su5 as 
		select a.*, b.filing_id_char, b.num_dup_filings
		from db&yy._brx_su4 as a left join
		filing_id_dup as b 
		on a.filing_id_char = b.filing_id_char;
	quit;

	data db&yy._brx_su5;
		set db&yy._brx_su5;
		where num_dup_filings=/*redacted*/ & source_id=/*redacted*/; /*pick the record from the research file*/
		if ACEMP=. & AC943E=. then delete; /*toss records that have no active employment and are duplicates*/
	run;

	data db&yy._brx_su6 (drop = t num_dup_filings);
		set db&yy._brx_su5 db&yy._brx_su3;
	run;
	
	/*Get employer size data for the de-duped list*/	
	data db&yy._brx_su_fin (drop=ACEMP AC943E AC943P ACQP1 ACQP2 ACQP3 ACQP4);
		set db&yy._brx_su6;
		esize_br = ACEMP;
		epayroll_br = sum(ACQP1,ACQP2,ACQP3,ACQP4);
		if esize_br in /*redacted*/ then esize_br=AC943E; 
		if epayroll_br in /*redacted*/ then epayroll_br=AC943P; 
	run;

	/*Re-stack the su and mu files*/
	data db&yy._brx_fin (drop= alpha ein_br t year frequency ein
		large partcp_account_bal_cnt last_rpt_plan_num last_rpt_spons_ein
		rename = (emplr_contrib_income_amt_n=db_emplr_contrib
			  employee_contrib_income_amt_n=db_emplee_contrib));
		set db&yy._brx_mu_fin db&yy._brx_su_fin;
	run;

	/*Add firm age, size, and pay from LBD*/
	proc sql;
	      create table db&yy._brx_fin_t as 
	      select a.*, b.firmage, b.pay as lbd_pay, b.emp as lbd_emp
	      from db&yy._brx_fin as a left join
	      lbd&yy..firm_&yy._emp_c201600 as b 
	      on a.firmid = b.firmid;
	quit;
	
	data temp&yy;
	set db&yy._brx_fin_t ;
	found_lbd=1;
	if lbd_emp=. then found_lbd=0;
	run;

	proc freq data = temp&yy;
	tables found_lbd*mu;
	run;
	
	/*Compute coverage rates*/
	data blue.db&yy._brx_fin;
	  set db&yy._brx_fin_t;
	  cov_rate_br_adj=.;
	  size = lbd_emp;
	  if size = . & esize_br>0 then size = esize_br;
	  cov_rate_br_adj=.;
	  if size>0 then cov_rate_br_adj=adj_DB_partcp/size;
	  cov_rate_br =.;
	  if size>0 then cov_rate_br=DB_partcp/size;	  

	proc datasets lib=work nolist;
	    delete dc&yy._firmid db&yy._brx_fin_t db&yy._brx_fin db&yy._brx_su_fin db&yy._brx_mu_fin 
	    db&yy._brx_su6 db&yy._brx_su5 db&yy._brx_su4 db&yy._brx_su3 db&yy._brx_su2 db&yy._brx_su1
	    mu&yy._all mu&yy._cnt mu&yy db&yy._brx_su db&yy._brx_mu db&yy._brx_fin
	    db&yy._brx_m db&yy._brx_rm db&yy._brx_nm1 db&yy._brx_nm2 db&yy._brx_nm db&yy._brx_m db&yy._brx temp&yy;
	quit;
	run;
		

%mend dbxwalktobr;

/*No pre-made bridge files from F5500 to BR after 2012, I reconstruct the bridge here*/

%macro dbxwalktobr1214 (yy);

	/*Drop any obvious duplicates within year*/
	proc sort data=db&yy nodupkey;
		by opr_ein opr_pn;
	run;
	
	/*Set of unique EINs*/
	proc sort data = db&yy nodupkey out=dbeins (keep=opr_ein);
		by opr_ein;
	run;

	/*SU BR file*/
	proc sort data = br&yy..ssl&yy.su nodupkey out = sueins (keep=ein act empunit_typ);
		by ein;
	run;

	data sueins (drop=empunit_typ);
	    length firmid /*redacted*/;
	    set sueins;
	    firmid = /*redacted*/;
	    mu=/*redacted*/;
	    inbr=/*redacted*/;
	    if empunit_typ=/*redacted*/ then delete;
	run;

	/*MU BR file*/
	proc sort data = br&yy..ssl&yy.mu nodupkey out = mueins (keep=alpha ein act);
		by ein;
	run;

	data mueins (drop=alpha);
	    length firmid /*redacted*/;
	    set mueins;
	    firmid = /*redacted*/;
	    mu=/*redacted*/;
	    inbr=/*redacted*/;
	run;

	/*Define active for MU's based on any ein*/
	data mueins;
	  set mueins;
	  active_n = /*redacted*/;
	  if act=/*redacted*/ then active_n = /*redacted*/;
	run;

	proc sort data = mueins;
	by firmid;
	run;

	proc means noprint data = mueins;
	    by firmid;
	    output out = mu_active_stat (keep = firmid active_all_flag)
	    max(active_n) = active_all_flag;
	run;

	proc sql;
	      create table mueins2 as 
	      select a.*, b.active_all_flag
	      from mueins as a inner join
	      mu_active_stat as b 
	      on a.firmid = b.firmid;
	quit;

	data mueins2 (drop = active_n);
	    set mueins2 (drop=act);
	    act = /*redacted*/;
	    if active_all_flag = /*redacted*/ then act=/*redacted*/;
	run;
	    
	data breins (keep= ein firmid mu inbr act);
	      set sueins mueins2;
	run;

	/*Merge 5500-EINs to BR-EINs*/
	proc sql;
	    create table f55bf&yy as 
	    select a.*,b.*
	    from dbeins as a left join
	    breins as b
	    on a.opr_ein = b.ein;
	quit;

	data f55bf&yy (keep = ein firmid flag active mu);
	    set f55bf&yy;
	    if inbr=. then flag=/*redacted*/;
	    else if inbr=/*redacted*/ & mu=/*redacted*/ & act=/*redacted*/ then flag=/*redacted*/;
	    else if inbr=/*redacted*/ & mu=/*redacted*/ & act=/*redacted*/ then flag=/*redacted*/;
	    else if inbr=/*redacted*/ & mu=/*redacted*/ & act=/*redacted*/ then flag=/*redacted*/;
	    else if inbr=/*redacted*/ & mu=/*redacted*/ & act=/*redacted*/ then flag=/*redacted*/;
	    active=/*redacted*/;
	    if act=/*redacted*/ then active=/*redacted*/;
	run;
  
	/*Merge back with F5500 file*/
	proc sql;
	      create table db&yy._brx_fin as 
	      select a.*, b.firmid, b.flag, b.active, b.mu
	      from db&yy as a left join
	      f55bf&yy as b
	      on a.opr_ein = b.ein;
	quit;

       /*Create a file to compute match rate disclosure stats for the F5500-BR match*/
	data discl_f55br&yy (keep=opr_ein opr_pn adj_DB_partcp match_br plan_year_end);
	    set db&yy._brx_fin;
	    match_br=/*redacted*/;
	    if flag in /*redacted*/ then match_br=/*redacted*/;
	run;
	
	/*Tabulate the quality of the match*/
	proc sort data = db&yy._brx_fin;
		by flag;
	run;
	
	proc means noprint data = db&yy._brx_fin;
		by flag;
		output out = ptcp_by_match
		sum(adj_DB_partcp) = adj_DB_partcp;
	run;

	data ptcp_by_match (drop=_type_);
		set ptcp_by_match (rename= (_freq_=num_firms adj_DB_partcp=num_active_ptcp));
	run;

	proc export data = ptcp_by_match
		outfile = "/.../data/f55brmatchstat_&yy..csv"
		dbms = csv replace;
	run;
	
	/*Split by SU and MU*/
	
	data db&yy._brx_su db&yy._brx_mu;
		length alpha /*redacted*/;
		length ein_br /*redacted*/;
		set db&yy._brx_fin;
		t=/*redacted*/;
		if mu=/*redacted*/ then ein_br= /*redacted*/;
		if mu=/*redacted*/ then alpha= /*redacted*/;
		if flag in /*redacted*/ then output db&yy._brx_su;
		else if flag in /*redacted*/ then output db&yy._brx_mu;
	run;
	
	/*Merge with BR files*/
	
	/*MU first*/
	
	proc sort data = br&yy..ssl&yy.mu out= mu&yy (keep=ein alpha sic REMP RAP);
		by alpha;
	run;

	/*Find the modal industry*/
	proc freq data=mu&yy noprint;
		by alpha;
		tables sic / out = mu&yy._cnt;
	run;

	proc sort data = mu&yy._cnt;
		by alpha descending count;
	run;

	data mu&yy._cnt;
		set mu&yy._cnt;
		by alpha;
		if first.alpha then mode=/*redacted*/;
	run;

	data mu&yy._cnt (keep=alpha sic);
		set mu&yy._cnt;
		where mode=/*redacted*/;
	run;
	
	proc means noprint data = mu&yy;
		by alpha;
		output out = mu&yy._size 
		sum(REMP) = esize_br
		sum(RAP) = epayroll_br;
	run;

	proc sql;
		create table mu&yy._all as
		select a.alpha, a.esize_br, a.epayroll_br, b.sic as sic_br
		from mu&yy._size as a left join
		mu&yy._cnt as b 
		on a.alpha = b.alpha;
	quit;
	
	proc sql;
		create table db&yy._brx_mu_fin as 
		select a.*, b.alpha, b.esize_br, b.epayroll_br, b.sic_br
		from db&yy._brx_mu as a inner join
		mu&yy._all as b 
		on a.alpha = b.alpha;
	quit; 
	
	/*SU, next*/
	proc sql;	
		create table db&yy._brx_su1 as
		select a.*, b.ACEMP, b.AC943E, b.ACQP1, b.ACQP2, b.ACQP3, b.ACQP4, b.AC943P, b.sic as sic_br, b.ein 
		from db&yy._brx_su as a left join
		br&yy..ssl&yy.su as b on 
		a.ein_br= b.ein;
	quit;

	/*Toss plans that map to a firmid more than once.*/
	
	proc sort data = db&yy._brx_su1;
		by firmid;
	run;

	proc means noprint data = db&yy._brx_su1;
		by firmid;
		output out = br_id_cnt sum(t) = frequency;
	run;

	proc sql;
		create table db&yy._brx_su2 as 
		select a.*, b.firmid, b.frequency
		from db&yy._brx_su1 as a left join
		br_id_cnt as b
		on a.firmid = b.firmid;
	quit;

	data db&yy._brx_su3 db&yy._brx_su4;
		set db&yy._brx_su2;
		if frequency = /*redacted*/ then output db&yy._brx_su3; /*clean no-dups*/
		else if frequency = /*redacted*/ then output db&yy._brx_su4; /*with dups*/
	run;

	/*Clean the dup file by picking only exact filing_id duplicates that are from the research file*/
	proc sort data = db&yy._brx_su4;
		by filing_id_char;
	run;

	proc means noprint data = db&yy._brx_su4;
		by filing_id_char;
		output out = filing_id_dup sum(t) = num_dup_filings;
	run;
	
	proc sql;
		create table db&yy._brx_su5 as 
		select a.*, b.filing_id_char, b.num_dup_filings
		from db&yy._brx_su4 as a left join
		filing_id_dup as b 
		on a.filing_id_char = b.filing_id_char;
	quit;

	data db&yy._brx_su5;
		set db&yy._brx_su5;
		where num_dup_filings=/*redacted*/ & source_id=/*redacted*/; /*pick the record from the research file*/
		if ACEMP=. & AC943E=. then delete; /*toss records that have no active employment and are duplicates*/
	run;

	data db&yy._brx_su6 (drop = t num_dup_filings);
		set db&yy._brx_su5 db&yy._brx_su3;
	run;
	
	/*Get employer size data for the de-duped list*/	
	data db&yy._brx_su_fin (drop=ACEMP AC943E AC943P ACQP1 ACQP2 ACQP3 ACQP4);
		set db&yy._brx_su6;
		esize_br = ACEMP;
		epayroll_br = sum(ACQP1,ACQP2,ACQP3,ACQP4);
		if esize_br in /*redacted*/ then esize_br=AC943E; 
		if epayroll_br in /*redacted*/ then epayroll_br=AC943P; 
	run;

	/*Re-stack the su and mu files*/
	data db&yy._brx_fin (drop= alpha ein_br t frequency ein
		large partcp_account_bal_cnt last_rpt_plan_num last_rpt_spons_ein
		rename = (emplr_contrib_income_amt_n=db_emplr_contrib
			  employee_contrib_income_amt_n=db_emplee_contrib));
		set db&yy._brx_mu_fin db&yy._brx_su_fin;
	run;

	/*Add firm age, size, and pay from LBD*/
	proc sql;
	      create table db&yy._brx_fin_t as 
	      select a.*, b.firmage, b.pay as lbd_pay, b.emp as lbd_emp
	      from db&yy._brx_fin as a left join
	      lbd&yy..firm_&yy._emp_c201600 as b 
	      on a.firmid = b.firmid;
	quit;
	
	data temp&yy;
	set db&yy._brx_fin_t ;
	found_lbd=/*redacted*/;
	if lbd_emp=. then found_lbd=/*redacted*/;
	run;

	proc freq data = temp&yy;
	tables found_lbd*mu;
	run;
	
	/*Compute coverage rates*/
	data blue.db&yy._brx_fin;
	  set db&yy._brx_fin_t;
	  cov_rate_br_adj=.;
	  size = lbd_emp;
	  if size = . & esize_br>0 then size = esize_br;
	  cov_rate_br_adj=.;
	  if size>0 then cov_rate_br_adj=adj_DB_partcp/size;
	  cov_rate_br =.;
	  if size>0 then cov_rate_br=DB_partcp/size;	  

	proc datasets lib=work nolist;
	    delete dc&yy._firmid db&yy._brx_fin_t db&yy._brx_su_fin db&yy._brx_mu_fin 
	    db&yy._brx_su6 db&yy._brx_su5 db&yy._brx_su4 db&yy._brx_su3 db&yy._brx_su2 db&yy._brx_su1
	    mu&yy._all mu&yy._cnt mu&yy db&yy._brx_su db&yy._brx_mu db&yy._brx_fin
	    dbeins mueins mueins2 sueins breins f55bf&yy temp&yy;
	quit;
	run;
		

%mend dbxwalktobr1214;



